package com.example.resume_management_system.service;

import com.alibaba.fastjson.JSON;
import com.example.resume_management_system.Variable.Variable;
import com.example.resume_management_system.pojo.EsFile;
import com.example.resume_management_system.pojo.fileInfo;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.http.HttpHost;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

import static org.elasticsearch.client.RequestOptions.DEFAULT;

@Service
public class EsServiceImpl {
    RestHighLevelClient restHighLevelClient= new RestHighLevelClient(RestClient.builder(new HttpHost(Variable.esIp,Variable.esPort,"http")));
    @Autowired
    FileInfoServicelmpl fileInfoServicelmpl;

    //提取文件中的纯文本字符串
    public String getFileText(String strpath) throws IOException {
        Configuration conf = new Configuration();
        conf.set("fs.default.name", strpath);
        FileSystem fs = FileSystem.get(conf);
        InputStream inputStream= fs.open(new Path(strpath));
        HWPFDocument document=new HWPFDocument(inputStream);
        WordExtractor extractor=new WordExtractor(document);
        String content=extractor.getText();
        return content;
    }


    //这里的filePath是完整的路径，如果前端传过来的是从根目录开始的路径，需要拼接再传入
    public void putData(String filePath) throws IOException {
        IndexRequest request=new IndexRequest("file");
        request.id(filePath);
        EsFile esFile=new EsFile(filePath.substring(filePath.lastIndexOf('/')+1,filePath.length()),getFileText(filePath));
        request.source(JSON.toJSONString(esFile), XContentType.JSON);
        restHighLevelClient.index(request, DEFAULT);
    }


    //根据关键字searchStr匹配文件内容,并返回匹配到的文件路径
    public List<String> getFilePathBySearchStr(String searchStr) throws IOException {
        List<String> list=new LinkedList<String>();
        SearchRequest searchRequest=new SearchRequest("file");
        SearchSourceBuilder sourceBuilder=new SearchSourceBuilder();
        MatchQueryBuilder termQueryBuilder= QueryBuilders.matchQuery("fileContent",searchStr);
        sourceBuilder.query(termQueryBuilder);
        searchRequest.source(sourceBuilder);
        //System.out.print(restHighLevelClient.search(searchRequest, DEFAULT).getHits().getAt(0));
        for (SearchHit hit : restHighLevelClient.search(searchRequest, DEFAULT).getHits()) {
            list.add(hit.getId());
        }
        return list;
    }


    //根据查找到的文件路径列表获得文件信息列表
    public List<fileInfo> getFileInfoListByFilePathList (List<String> pathList){
        List<fileInfo> list=new LinkedList<fileInfo>();
        for (String filePath : pathList){
            list.add((fileInfo) fileInfoServicelmpl.fileinfo(filePath).get(0));
        }
        return list;
    }
}
